# 导入所需模块
import sys
sys.path.append("..")
from baseset import *
from getdata import getdata
import hcplot as hc
import pandas as pd
# 获取数据
fpath ='./data/w_datadic_ru.xlsx'
df = getdata(fpath,startdate='2005-01-01')
# 加工数据
df['Wind橡胶指数(月均)'] = df['Wind橡胶指数'].resample('M').mean()
df['上期所天胶库存'] = round(df['上期所天胶库存'] / 10000, 2)
df['期现价差'] = df['Wind橡胶指数'] - df['云南全乳胶(上海)']
df['重卡销量'] = df['重型货车销量'] + df['半挂牵引车销量'] + df['非完整车辆销量']
df['月度供需差:中国'] = df['天胶产量:中国'] + df['天胶进口:中国'] - df['天胶消费:中国']
df0 = (df.tail(10)).T.fillna('')
df0.columns = (df0.columns.map(lambda t: t.strftime('%Y-%m-%d')))
df0
# 量化评分
q_df = pd.DataFrame()
q_df['沪胶'] = df['Wind橡胶指数']
q_df['商品'] = df['Wind商品指数']
q_df['美指'] = df['美元指数']
q_df['替代品'] = df['顺丁橡胶(中石化华北)']
q_df['日胶'] = df['日胶(活跃合约收盘)']
q_df['库存'] = df['青岛保税区橡胶库存'].fillna(method='ffill')
# q_df['重卡'] = df['重卡销量']
# q_df['期现差'] = df['期现价差']
q_df = q_df.dropna()
q_df['沪胶_MA'] = pd.rolling_mean(q_df['沪胶'], 60)
q_df['商品_MA'] = pd.rolling_mean(q_df['商品'], 60)
q_df['美指_MA'] = pd.rolling_mean(q_df['美指'], 60)
q_df['替代品_MA'] = pd.rolling_mean(q_df['替代品'], 60)
q_df['日胶_MA'] = pd.rolling_mean(q_df['日胶'], 60)
q_df['库存变化'] = q_df['库存'] - q_df['库存'].shift(12)
# 设置各指标权重
q_df['商品_得分'] = 0
q_df.loc[q_df.商品 > q_df.商品_MA,'商品_得分'] = 0.2
q_df.loc[q_df.商品 < q_df.商品_MA,'商品_得分'] = -0.2
q_df['美指_得分'] = 0
q_df.loc[q_df.美指 > q_df.美指_MA,'美指_得分'] = -0.2
q_df.loc[q_df.美指 < q_df.美指_MA,'美指_得分'] = 0.2
q_df['替代品_得分'] = 0
q_df.loc[q_df.替代品 > q_df.替代品_MA,'替代品_得分'] = 0.2
q_df.loc[q_df.替代品 < q_df.替代品_MA,'替代品_得分'] = -0.2
q_df['日胶_得分'] = 0
q_df.loc[q_df.日胶 > q_df.日胶_MA,'日胶_得分'] = 0.2
q_df.loc[q_df.日胶 < q_df.日胶_MA,'日胶_得分'] = -0.2
q_df['库存_得分'] = None
q_df.loc[q_df.库存变化 > 0,'库存_得分'] = -0.2
q_df.loc[q_df.库存变化 < 0,'库存_得分'] = 0.2
q_df['库存_得分'] = q_df['库存_得分'].fillna(method='ffill')
# 周期得分规则:3-8月偏空,9-2偏多
q_df['周期_得分'] = 0.2
q_df.loc[q_df.index.map(lambda t: t.month in [3,4,5,6,7,8]),'周期_得分'] = -0.2
q_df['多空信号'] = q_df['商品_得分'] + q_df['美指_得分'] + q_df['日胶_得分'] + q_df['替代品_得分'] + q_df['库存_得分']
q_df.iloc[:,[0,12,13,14,15,16,17,18]].tail()
hc.y2(q_df[['沪胶','多空信号']],y2list=[1],ldic={1:'column'})
# q_df.index = (q_df.index.map(lambda t: t.strftime('%Y-%m-%d')))
# q_df.to_excel("./data/q_zc.xlsx") # 输出到excel文件
pz = ['Wind橡胶指数','Wind商品指数','布油']
hc.bfb(df[pz])
pz = ['Wind橡胶指数','美元指数','美元兑人民币(中间价)']
hc.y22(df[pz[:2]],addtype='corr')
hc.y22(df[pz[::2]],addtype='corr')
pz = ['Wind橡胶指数','沪深300']
hc.y22(df[pz],addtype='corr')
pz='Wind橡胶指数'
# hc.season_bfb(df[pz],title=pz)
hc.season(df[pz],title=pz,ldic={13:[True,'square',4]},mondata=True)
# hc.season(df[pz],title=pz,ltype='column',mondata=True,showlst=[11,12,13])
pz = ['沪胶(活跃合约收盘)','日胶(活跃合约收盘)']
hc.bfb2(df[pz],addtype='corr')
pz=['Wind橡胶指数']
# ----获得事件数据-----
fpath ='./data/事件备忘.xlsx'
with open(fpath, "rb") as f:
sjdf = pd.read_excel(f,sheet_name=[0,1],names=['x','kind','title','text','bz'],usecols=[0,1,2,3,4])
df0 = sjdf[0]
# df1 = sjdf[1]
df0 = df0[['x','title','text']][(df0.kind=='天胶') & (df0.title=='政')] # 根据事件涉及品种进行筛选
# df1 = df1[['x','title','text']][(df1.kind=='天胶') & (df1.bz=='单边')]
df0 = df0.to_dict(orient='records')
# df1 = df1.to_dict(orient='records')
hc.y1(df[pz],sjdata=df0,sele=5)
hc.y2(df[['期现价差']],sele=5)
从历史数据来看,期现价差核心波动区间分两个阶段:2013年前,在(-2000,2000);2013年开始在(0,2000)。如果背离该区间,向均值回归概率较大。
pz = ['沪胶1月','沪胶5月','沪胶9月']
legend = ['1月-5月','5月-9月', '9月-1月']
df1 = df[pz[:2]].dropna()
df1[legend[0]] = df1[pz[0]]-df1[pz[1]] # 5月-1月
df1 = df1[df1.index.map(lambda t: t.month in [6,7,8,9,10,11,12])]
df2 = df[pz[1:]].dropna()
df2[legend[1]] = df2[pz[1]]-df2[pz[2]] # 9月-5月
df2 = df2[df2.index.map(lambda t: t.month in [1,2,3,4,10,11,12])]
df3 = df[pz[::2]].dropna()
df3[legend[2]] = df3[pz[2]]-df3[pz[0]] # 1月-9月
df3 = df3[df3.index.map(lambda t: t.month in [2,3,4,5,6,7,8])]
# # ----获得事件数据-----
# fpath ='./data/事件备忘.xlsx'
# with open(fpath, "rb") as f:
# sjdf = pd.read_excel(f,sheet_name=[0,1],names=['x','kind','title','text','bz'],usecols=[0,1,2,3,4])
# df4 = sjdf[0]
# df5 = sjdf[1]
# df4 = df4[['x','title','text']][df4.kind=='郑煤'] # 根据事件涉及品种进行筛选
# df5 = df5[['x','title','text']][(df5.kind=='郑煤') & (df5.bz=='套利')]
# df4 = df4.to_dict(orient='records')
# df5 = df5.to_dict(orient='records')
hc.y2(df1,y2list=[2],ldic={2:'column'})
hc.y2(df2,y2list=[2],ldic={2:'column'})
hc.y2(df3,y2list=[2],ldic={2:'column'})
【说明】 基于流动性和对比一致性考虑,数据剔除了跨中间合约的月份数据,比如9-1只取2,3,4,5,6,7,8月的数据。
pz = ['Wind橡胶指数(月均)','天胶产量:中国','天胶进口:中国','天胶消费:中国', '月度供需差:中国']
mydf = df[pz].dropna(how='any')
# mydf[pz[4] + '同比'] = round((mydf[pz[4]] - mydf[pz[4]].shift(12)) / mydf[pz[4]].shift(12),2)
mydf.tail().fillna('')
pz1 = pz[2]
hc.season(mydf[pz1],title=pz1 + '(单位:千吨)',ltype='column',mondata=True,showlst=[8,9,10])
# hc.y22(mydf.iloc[:,[0,5]],sele=5,addtype='corr',cf=12)
hc.y2(mydf[pz[::4]],sele=5)
pz = ['天胶产量:泰国','天胶产量:印尼','天胶产量:马来西亚', '天胶产量:越南','天胶产量:ANRPC']
mydf = df[pz].dropna(how='any')
mydf.tail().fillna('')
pz1 = '天胶产量:泰国'
hc.season(mydf[pz1],title=pz1 + '(单位:千吨)',ltype='column',mondata=True,showlst=[5,6,7])
pz = ['天胶出口:泰国','天胶出口:印尼','天胶出口:马来西亚', '天胶出口:越南','天胶出口:ANRPC']
mydf = df[pz].dropna(how='any')
mydf.tail().fillna('')
pz1 = '天胶出口:泰国'
hc.season(mydf[pz1],title=pz1 + '(单位:千吨)',ltype='column',mondata=True,showlst=[5,6,7])
pz = ['青岛保税区橡胶库存','上期所天胶库存']
mydf = df[pz].dropna(how = 'any')
mydf.tail().fillna('')
pz1 = '青岛保税区橡胶库存'
hc.season(mydf[pz1],title = pz1 + '(单位:万吨)',showlst = [6,7,8])
pz = ['Wind橡胶指数(月均)','重卡销量']
mydf = df[pz].dropna()
mydf[pz[1] + '同比'] = round((mydf[pz[1]] - mydf[pz[1]].shift(12)) / mydf[pz[1]].shift(12),2)
hc.y22(mydf.iloc[:,[0,2]],sele=5,addtype='corr',cf=12)
pz=['Wind橡胶指数']
# ----获得事件数据-----
fpath ='./data/事件备忘.xlsx'
with open(fpath, "rb") as f:
sjdf = pd.read_excel(f,sheet_name=[0,1],names=['x','kind','title','text','bz'],usecols=[0,1,2,3,4])
df0 = sjdf[0]
# df1 = sjdf[1]
df0 = df0[['x','title','text']][(df0.kind=='天胶') & (df0.title=='气')] # 根据事件涉及品种进行筛选
# df1 = df1[['x','title','text']][(df1.kind=='天胶') & (df1.bz=='单边')]
df0 = df0.to_dict(orient='records')
# df1 = df1.to_dict(orient='records')
hc.y1(df[pz],sjdata=df0,sele=5)
# 基本参数设置
label1='沪胶(活跃合约收盘)'
label2='顺丁橡胶(中石化华北)'
label3='丁苯橡胶(中石化华北)'
label4='丁二烯(中石化华北)'
legend = ['沪胶-顺丁','沪胶-丁苯','沪胶-丁二烯']
mydf = df[[label1,label2,label3,label4]]
# 叠加图
hc.bfb(mydf,sele=4)
mydf = mydf.dropna(how='any')
# 相关分析
df1 = mydf[label1]
df2 = mydf[label2]
df3 = mydf[label3]
df4 = mydf[label4]
z1 = df1.rolling(window=252).corr(other=df2)
z2 = df1.rolling(window=252).corr(other=df3)
z3 = df1.rolling(window=252).corr(other=df4)
mydf = pd.concat([z1, z2,z3], axis=1, join='inner')
mydf.columns=legend
hc.y2(mydf,y2list=[],sele=4)
# 基本参数设置
label1='沪胶(活跃合约收盘)'
label2='顺丁橡胶(中石化华北)'
label3='丁苯橡胶(中石化华北)'
legend = ['沪胶-顺丁','沪胶-丁苯','沪胶']
# 数据清洗
df1 = df[label1] - df[label2]
df2 = df[label1] - df[label3]
df3 = df[label1]
mydf = pd.concat([df1, df2,df3], axis=1, join='inner')
mydf.columns = legend
hc.y2(mydf,y2list=[2],sele=4)
pz = ['Wind橡胶指数','中化国际','海南橡胶']
hc.bfb(df[pz])